import os
import subprocess

# 设置参数
env = "MPE"
hanabi = "simple_spread"
num_landmarks = 3
num_agents = 3
algo = "rmappo"  # "mappo" "ippo"
exp = "check"
seed_max = 1

# 设置文件描述符限制 (Windows下不需要 ulimit)
# 这里 ulimit -n 22222 对应的文件限制在Windows下不太相关，可以忽略

# 打印环境信息
print(f"env is {env}, algo is {algo}, exp is {exp}, max seed is {seed_max}")

# 循环执行每个 seed 的训练
for seed in range(1, seed_max + 1):
    print(f"seed is {seed}:")

    # 设置环境变量 CUDA_VISIBLE_DEVICES=0
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    # 定义命令
    command = [
        "python",
        "E:\Code_file\Python\on-policy-main\onpolicy\scripts\train",
        "--env_name",
        env,
        "--algorithm_name",
        algo,
        "--experiment_name",
        exp,
        "--num_agents",
        str(num_agents),
        "--num_landmarks",
        str(num_landmarks),
        "--seed",
        str(seed),
        "--n_training_threads",
        "1",
        "--n_rollout_threads",
        "1000",
        "--num_mini_batch",
        "1",
        "--episode_length",
        "100",
        "--num_env_steps",
        "10000000000000",
        "--ppo_epoch",
        "15",
        "--gain",
        "0.01",
        "--lr",
        "7e-4",
        "--critic_lr",
        "1e-3",
        "--hidden_size",
        "512",
        "--layer_N",
        "2",
        "--entropy_coef",
        "0.015",
    ]

    # 执行命令
    subprocess.run(command)

    print("training is done!")
